/* Name: usbdrvasm18.inc
 * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
 * Author: Lukas Schrittwieser (based on 20 MHz usbdrvasm20.inc by Jeroen Benschop)
 * Creation Date: 2009-01-20
 * Tabsize: 4
 * Copyright: (c) 2008 by Lukas Schrittwieser and OBJECTIVE DEVELOPMENT Software GmbH
 * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
 */

/* Do not link this file! Link usbdrvasm.S instead, which includes the
 * appropriate implementation!
 */

/*
General Description:
This file is the 18 MHz version of the asssembler part of the USB driver. It
requires a 18 MHz crystal (not a ceramic resonator and not a calibrated RC
oscillator).

See usbdrv.h for a description of the entire driver.

Since almost all of this code is timing critical, don't change unless you
really know what you are doing! Many parts require not only a maximum number
of CPU cycles, but even an exact number of cycles!

Note: This version is smaller than usbdrvasm18-crc.inc because it saves the CRC
table. It's therefore suitable for boot loaders on boards @ 18 MHz. However, it
is not as small as it could be, because loops are unrolled in the same way as in
usbdrvasm18-crc.inc. There is room for optimization.
*/


;max stack usage: [ret(2), YL, SREG, YH, [sofError], bitcnt(x5), shift, x1, x2, x3, x4, cnt, ZL, ZH] = 14 bytes
;nominal frequency: 18 MHz -> 12 cycles per bit
; Numbers in brackets are clocks counted from center of last sync bit
; when instruction starts
;register use in receive loop to receive the data bytes:
; shift assembles the byte currently being received
; x1 holds the D+ and D- line state
; x2 holds the previous line state
; cnt holds the number of bytes left in the receive buffer
; x4 is used as temporary register
; x3 is used for unstuffing: when unstuffing the last received bit is inverted in shift (to prevent further
;    unstuffing calls. In the same time the corresponding bit in x3 is cleared to mark the bit as beening iverted
; zl lower crc value and crc table index
; zh used for crc table accesses



macro POP_STANDARD ; 18 cycles
    pop     cnt
    pop     x5
    pop     x3
    pop     x2
    pop     x1
    pop     shift
    pop     x4
    endm
macro POP_RETI     ; 7 cycles
    pop     YH
    pop     YL
    out     SREG, YL
    pop     YL
    endm

;macro CRC_CLEANUP_AND_CHECK
;   ; the last byte has already been xored with the lower crc byte, we have to do the table lookup and xor
;   ; x3 is the higher crc byte, zl the lower one
;   ldi     ZH, hi8(usbCrcTableHigh);[+1] get the new high byte from the table
;   lpm     x2, Z               ;[+2][+3][+4]
;   ldi     ZH, hi8(usbCrcTableLow);[+5] get the new low xor byte from the table
;   lpm     ZL, Z               ;[+6][+7][+8]
;   eor     ZL, x3              ;[+7] xor the old high byte with the value from the table, x2:ZL now holds the crc value
;   cpi     ZL, 0x01            ;[+8] if the crc is ok we have a fixed remainder value of 0xb001 in x2:ZL (see usb spec)
;   brne    ignorePacket        ;[+9] detected a crc fault -> paket is ignored and retransmitted by the host
;   cpi     x2, 0xb0            ;[+10]
;   brne    ignorePacket        ;[+11] detected a crc fault -> paket is ignored and retransmitted by the host
;    endm
        

USB_INTR_VECTOR:
;order of registers pushed: YL, SREG, YH, [sofError], x4, shift, x1, x2, x3, x5, cnt, ZL, ZH
    push    YL                  ;[-28] push only what is necessary to sync with edge ASAP
    in      YL, SREG            ;[-26]
    push    YL                  ;[-25]
    push    YH                  ;[-23]
;----------------------------------------------------------------------------
; Synchronize with sync pattern:
;----------------------------------------------------------------------------
;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
;sync up with J to K edge during sync pattern -- use fastest possible loops
;The first part waits at most 1 bit long since we must be in sync pattern.
;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
;waitForJ, ensure that this prerequisite is met.
waitForJ:
    inc     YL
    sbis    USBIN, USBMINUS
    brne    waitForJ        ; just make sure we have ANY timeout
waitForK:
;The following code results in a sampling window of < 1/4 bit which meets the spec.
    sbis    USBIN, USBMINUS     ;[-17]
    rjmp    foundK              ;[-16]
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
    sbis    USBIN, USBMINUS
    rjmp    foundK
#if USB_COUNT_SOF
    lds     YL, usbSofCount
    inc     YL
    sts     usbSofCount, YL
#endif  /* USB_COUNT_SOF */
#ifdef USB_SOF_HOOK
    USB_SOF_HOOK
#endif
    rjmp    sofError
foundK:                         ;[-15]
;{3, 5} after falling D- edge, average delay: 4 cycles
;bit0 should be at 30  (2.5 bits) for center sampling. Currently at 4 so 26 cylces till bit 0 sample
;use 1 bit time for setup purposes, then sample again. Numbers in brackets
;are cycles from center of first sync (double K) bit after the instruction
    push    x4                  ;[-14]
;   [---]                       ;[-13]
    lds     YL, usbInputBufOffset;[-12] used to toggle the two usb receive buffers
;   [---]                       ;[-11]
    clr     YH                  ;[-10]
    subi    YL, lo8(-(usbRxBuf));[-9] [rx loop init]
    sbci    YH, hi8(-(usbRxBuf));[-8] [rx loop init]
    push    shift               ;[-7]
;   [---]                       ;[-6]
    ldi     shift, 0x80         ;[-5] the last bit is the end of byte marker for the pid receiver loop
    clc                         ;[-4] the carry has to be clear for receipt of pid bit 0
    sbis    USBIN, USBMINUS     ;[-3] we want two bits K (sample 3 cycles too early)
    rjmp    haveTwoBitsK        ;[-2]
    pop     shift               ;[-1] undo the push from before
    pop     x4                  ;[1]
    rjmp    waitForK            ;[3] this was not the end of sync, retry
; The entire loop from waitForK until rjmp waitForK above must not exceed two
; bit times (= 24 cycles).

;----------------------------------------------------------------------------
; push more registers and initialize values while we sample the first bits:
;----------------------------------------------------------------------------
haveTwoBitsK:
    push    x1                  ;[0]
    push    x2                  ;[2]
    push    x3                  ;[4] 
    ldi     x2, 1<<USBPLUS      ;[6] [rx loop init] current line state is K state. D+=="1", D-=="0"
    push    x5                  ;[7] used by tx loop for bitcnt
    push    cnt                 ;[9]
    ldi     cnt, USB_BUFSIZE    ;[11]


;--------------------------------------------------------------------------------------------------------------
; receives the pid byte
; there is no real unstuffing algorithm implemented here as a stuffing bit is impossible in the pid byte.
; That's because the last four bits of the byte are the inverted of the first four bits. If we detect a
; unstuffing condition something went wrong and abort
; shift has to be initialized to 0x80
;--------------------------------------------------------------------------------------------------------------

bitloopPid:                     
    in      x1, USBIN           ;[0] sample line state
    andi    x1, USBMASK         ;[1] filter only D+ and D- bits
    breq    nse0                ;[2] both lines are low so handle se0   
    eor     x2, x1              ;[3] generate inverted of actual bit
    sbrc    x2, USBMINUS        ;[4] set the carry if we received a zero
    sec                         ;[5]
    ror     shift               ;[6]
    nop                         ;[7] ZL is the low order crc value
    ser     x4                  ;[8] the is no bit stuffing check here as the pid bit can't be stuffed. if so
                                ; some error occured. In this case the paket is discarded later on anyway.
    mov     x2, x1              ;[9] prepare for the next cycle
    brcc    bitloopPid          ;[10] while 0s drop out of shift we get the next bit
    eor     x4, shift           ;[11] invert all bits in shift and store result in x4

;--------------------------------------------------------------------------------------------------------------
; receives data bytes and calculates the crc
; the last USBIN state has to be in x2
; this is only the first half, due to branch distanc limitations the second half of the loop is near the end
; of this asm file
;--------------------------------------------------------------------------------------------------------------

rxDataStart:
    in      x1, USBIN           ;[0] sample line state (note: a se0 check is not useful due to bit dribbling)
    ser     x3                  ;[1] prepare the unstuff marker register
    eor     x2, x1              ;[2] generates the inverted of the actual bit
    bst     x2, USBMINUS        ;[3] copy the bit from x2
    bld     shift, 0            ;[4] and store it in shift
    mov     x2, shift           ;[5] make a copy of shift for unstuffing check
    andi    x2, 0xF9            ;[6] mask the last six bits, if we got six zeros (which are six ones in fact)
    breq    unstuff0            ;[7] then Z is set now and we branch to the unstuffing handler
didunstuff0:
    subi    cnt, 1              ;[8] cannot use dec because it doesn't affect the carry flag
    brcs    nOverflow           ;[9] Too many bytes received. Ignore packet                         
    st      Y+, x4              ;[10] store the last received byte
                                ;[11] st needs two cycles

; bit1                          
    in      x2, USBIN           ;[0] sample line state
    andi    x1, USBMASK         ;[1] check for se0 during bit 0
    breq    nse0                ;[2]
    andi    x2, USBMASK         ;[3] check se0 during bit 1
    breq    nse0                ;[4]
    eor     x1, x2              ;[5]
    bst     x1, USBMINUS        ;[6]
    bld     shift, 1            ;[7]
    mov     x1, shift           ;[8]
    andi    x1, 0xF3            ;[9]
    breq    unstuff1            ;[10]
didunstuff1:
    nop                         ;[11]   

; bit2
    in      x1, USBIN           ;[0] sample line state
    andi    x1, USBMASK         ;[1] check for se0 (as there is nothing else to do here
    breq    nOverflow           ;[2]
    eor     x2, x1              ;[3] generates the inverted of the actual bit
    bst     x2, USBMINUS        ;[4]
    bld     shift, 2            ;[5] store the bit
    mov     x2, shift           ;[6]
    andi    x2, 0xE7            ;[7] if we have six zeros here (which means six 1 in the stream)
    breq    unstuff2            ;[8] the next bit is a stuffing bit
didunstuff2:
    nop2                        ;[9]
                                ;[10]
    nop                         ;[11]                   
                    
; bit3                          
    in      x2, USBIN           ;[0] sample line state
    andi    x2, USBMASK         ;[1] check for se0
    breq    nOverflow           ;[2]
    eor     x1, x2              ;[3]
    bst     x1, USBMINUS        ;[4]
    bld     shift, 3            ;[5]
    mov     x1, shift           ;[6]
    andi    x1, 0xCF            ;[7]
    breq    unstuff3            ;[8]
didunstuff3:
    nop                         ;[9]
    rjmp    rxDataBit4          ;[10]
                                ;[11]               

; the avr branch instructions allow an offset of +63 insturction only, so we need this
; 'local copy' of se0
nse0:       
    rjmp    se0                 ;[4]
                                ;[5]
; the same same as for se0 is needed for overflow and StuffErr
nOverflow:
stuffErr:
    rjmp    overflow


unstuff0:                       ;[8] this is the branch delay of breq unstuffX
    andi    x1, USBMASK         ;[9] do an se0 check here (if the last crc byte ends with 5 one's we might end up here
    breq    didunstuff0         ;[10] event tough the message is complete -> jump back and store the byte
    ori     shift, 0x01         ;[11] invert the last received bit to prevent furhter unstuffing
    in      x2, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    andi    x3, 0xFE            ;[1] mark this bit as inverted (will be corrected before storing shift)
    eor     x1, x2              ;[2] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x1, USBMASK         ;[3] mask the interesting bits
    breq    stuffErr            ;[4] if the stuff bit is a 1-bit something went wrong
    mov     x1, x2              ;[5] the next bit expects the last state to be in x1
    rjmp    didunstuff0         ;[6]
                                ;[7] jump delay of rjmp didunstuffX 

unstuff1:                       ;[11] this is the jump delay of breq unstuffX
    in      x1, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    ori     shift, 0x02         ;[1] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0xFD            ;[2] mark this bit as inverted (will be corrected before storing shift)
    eor     x2, x1              ;[3] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x2, USBMASK         ;[4] mask the interesting bits
    breq    stuffErr            ;[5] if the stuff bit is a 1-bit something went wrong
    mov     x2, x1              ;[6] the next bit expects the last state to be in x2
    nop2                        ;[7]
                                ;[8]
    rjmp    didunstuff1         ;[9]
                                ;[10] jump delay of rjmp didunstuffX        

unstuff2:                       ;[9] this is the jump delay of breq unstuffX
    ori     shift, 0x04         ;[10] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0xFB            ;[11] mark this bit as inverted (will be corrected before storing shift)
    in      x2, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    eor     x1, x2              ;[1] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x1, USBMASK         ;[2] mask the interesting bits
    breq    stuffErr            ;[3] if the stuff bit is a 1-bit something went wrong
    mov     x1, x2              ;[4] the next bit expects the last state to be in x1
    nop2                        ;[5]
                                ;[6]
    rjmp    didunstuff2         ;[7]
                                ;[8] jump delay of rjmp didunstuffX 

unstuff3:                       ;[9] this is the jump delay of breq unstuffX
    ori     shift, 0x08         ;[10] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0xF7            ;[11] mark this bit as inverted (will be corrected before storing shift)
    in      x1, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    eor     x2, x1              ;[1] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x2, USBMASK         ;[2] mask the interesting bits
    breq    stuffErr            ;[3] if the stuff bit is a 1-bit something went wrong
    mov     x2, x1              ;[4] the next bit expects the last state to be in x2
    nop2                        ;[5]
                                ;[6]
    rjmp    didunstuff3         ;[7]
                                ;[8] jump delay of rjmp didunstuffX         



; the include has to be here due to branch distance restirctions
#include "asmcommon.inc"

    

; USB spec says:
; idle = J
; J = (D+ = 0), (D- = 1)
; K = (D+ = 1), (D- = 0)
; Spec allows 7.5 bit times from EOP to SOP for replies
; 7.5 bit times is 90 cycles. ...there is plenty of time


sendNakAndReti:
    ldi     x3, USBPID_NAK  ;[-18]
    rjmp    sendX3AndReti   ;[-17]
sendAckAndReti:
    ldi     cnt, USBPID_ACK ;[-17]
sendCntAndReti:
    mov     x3, cnt         ;[-16]
sendX3AndReti:
    ldi     YL, 20          ;[-15] x3==r20 address is 20
    ldi     YH, 0           ;[-14]
    ldi     cnt, 2          ;[-13]
;   rjmp    usbSendAndReti      fallthrough

;usbSend:
;pointer to data in 'Y'
;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
;uses: x1...x4, btcnt, shift, cnt, Y
;Numbers in brackets are time since first bit of sync pattern is sent

usbSendAndReti:             ; 12 cycles until SOP
    in      x2, USBDDR      ;[-12]
    ori     x2, USBMASK     ;[-11]
    sbi     USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
    in      x1, USBOUT      ;[-8] port mirror for tx loop
    out     USBDDR, x2      ;[-6] <- acquire bus
    ldi     x2, 0           ;[-6] init x2 (bitstuff history) because sync starts with 0
    ldi     x4, USBMASK     ;[-5] exor mask
    ldi     shift, 0x80     ;[-4] sync byte is first byte sent
txByteLoop:
    ldi     bitcnt, 0x40    ;[-3]=[9]     binary 01000000
txBitLoop:                  ; the loop sends the first 7 bits of the byte
    sbrs    shift, 0        ;[-2]=[10] if we have to send a 1 don't change the line state
    eor     x1, x4          ;[-1]=[11]
    out     USBOUT, x1      ;[0]
    ror     shift           ;[1]
    ror     x2              ;[2] transfers the last sent bit to the stuffing history
didStuffN:
    nop                     ;[3]
    nop                     ;[4]
    cpi     x2, 0xfc        ;[5] if we sent six consecutive ones
    brcc    bitstuffN       ;[6]
    lsr     bitcnt          ;[7]
    brne    txBitLoop       ;[8] restart the loop while the 1 is still in the bitcount

; transmit bit 7
    sbrs    shift, 0        ;[9]
    eor     x1, x4          ;[10]
didStuff7:
    ror     shift           ;[11]
    out     USBOUT, x1      ;[0] transfer bit 7 to the pins
    ror     x2              ;[1] move the bit into the stuffing history 
    cpi     x2, 0xfc        ;[2]
    brcc    bitstuff7       ;[3]
    ld      shift, y+       ;[4] get next byte to transmit
    dec     cnt             ;[5] decrement byte counter
    brne    txByteLoop      ;[7] if we have more bytes start next one
                            ;[8] branch delay
                            
;make SE0:
    cbr     x1, USBMASK     ;[8]        prepare SE0 [spec says EOP may be 25 to 30 cycles]
    lds     x2, usbNewDeviceAddr;[9]
    lsl     x2              ;[11]       we compare with left shifted address
    out     USBOUT, x1      ;[0]        <-- out SE0 -- from now 2 bits = 24 cycles until bus idle
    subi    YL, 20 + 2      ;[1]        Only assign address on data packets, not ACK/NAK in x3
    sbci    YH, 0           ;[2]
;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
;set address only after data packet was sent, not after handshake
    breq    skipAddrAssign  ;[3]
    sts     usbDeviceAddr, x2       ; if not skipped: SE0 is one cycle longer
skipAddrAssign:
;end of usbDeviceAddress transfer
    ldi     x2, 1<<USB_INTR_PENDING_BIT;[5] int0 occurred during TX -- clear pending flag
    USB_STORE_PENDING(x2)   ;[6]
    ori     x1, USBIDLE     ;[7]
    in      x2, USBDDR      ;[8]
    cbr     x2, USBMASK     ;[9] set both pins to input
    mov     x3, x1          ;[10]
    cbr     x3, USBMASK     ;[11] configure no pullup on both pins
    ldi     x4, 4           ;[12]
se0Delay:
    dec     x4              ;[13] [16] [19] [22]
    brne    se0Delay        ;[14] [17] [20] [23]
    out     USBOUT, x1      ;[24] <-- out J (idle) -- end of SE0 (EOP signal)
    out     USBDDR, x2      ;[25] <-- release bus now
    out     USBOUT, x3      ;[26] <-- ensure no pull-up resistors are active
    rjmp    doReturn

bitstuffN:
    eor     x1, x4          ;[8] generate a zero
    ldi     x2, 0           ;[9] reset the bit stuffing history
    nop2                    ;[10]
    out     USBOUT, x1      ;[0] <-- send the stuffing bit
    rjmp    didStuffN       ;[1]

bitstuff7:
    eor     x1, x4          ;[5]
    ldi     x2, 0           ;[6] reset bit stuffing history
    clc                     ;[7] fill a zero into the shift register
    rol     shift           ;[8] compensate for ror shift at branch destination
    rjmp    didStuff7       ;[9]
                            ;[10] jump delay

;--------------------------------------------------------------------------------------------------------------
; receives data bytes and calculates the crc
; second half of the data byte receiver loop
; most parts of the crc algorithm are here
;--------------------------------------------------------------------------------------------------------------

nOverflow2:
    rjmp overflow

rxDataBit4:
    in      x1, USBIN           ;[0] sample line state
    andi    x1, USBMASK         ;[1] check for se0
    breq    nOverflow2          ;[2]
    eor     x2, x1              ;[3]
    bst     x2, USBMINUS        ;[4]
    bld     shift, 4            ;[5]
    mov     x2, shift           ;[6]
    andi    x2, 0x9F            ;[7]
    breq    unstuff4            ;[8]
didunstuff4:
    nop2                        ;[9][10]
    nop                         ;[11]

; bit5                          
    in      x2, USBIN           ;[0] sample line state
    nop                         ;[1] use the table for the higher byte
    eor     x1, x2              ;[2]
    bst     x1, USBMINUS        ;[3]
    bld     shift, 5            ;[4]
    mov     x1, shift           ;[5]
    andi    x1, 0x3F            ;[6]
    breq    unstuff5            ;[7]
didunstuff5:
    nop2                        ;[8] load the higher crc xor-byte and store it for later use
                                ;[9] lpm needs 3 cycles
    nop                         ;[10]           
    nop                         ;[11] load the lower crc xor byte adress

; bit6                          
    in      x1, USBIN           ;[0] sample line state
    eor     x2, x1              ;[1]
    bst     x2, USBMINUS        ;[2]
    bld     shift, 6            ;[3]
    mov     x2, shift           ;[4]
    andi    x2, 0x7E            ;[5]
    breq    unstuff6            ;[6]
didunstuff6:
    nop2                        ;[7] load the lower xor crc byte
                                ;[8] lpm needs 3 cycles
    nop                         ;[9]
    nop                         ;[10] xor the old high crc byte with the low xor-byte
    nop                         ;[11] move the new high order crc value from temp to its destination
            
; bit7                          
    in      x2, USBIN           ;[0] sample line state
    eor     x1, x2              ;[1]
    bst     x1, USBMINUS        ;[2]
    bld     shift, 7            ;[3] now shift holds the complete but inverted data byte
    mov     x1, shift           ;[4]
    andi    x1, 0xFC            ;[5]
    breq    unstuff7            ;[6]
didunstuff7:
    eor     x3, shift           ;[7] x3 marks all bits which have not been inverted by the unstuffing subs
    mov     x4, x3              ;[8] keep a copy of the data byte it will be stored during next bit0
    nop                         ;[9] feed the actual byte into the crc algorithm
    rjmp    rxDataStart         ;[10] next byte
                                ;[11] during the reception of the next byte this one will be fed int the crc algorithm

unstuff4:                       ;[9] this is the jump delay of rjmp unstuffX
    ori     shift, 0x10         ;[10] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0xEF            ;[11] mark this bit as inverted (will be corrected before storing shift)
    in      x2, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    eor     x1, x2              ;[1] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x1, USBMASK         ;[2] mask the interesting bits
    breq    stuffErr2           ;[3] if the stuff bit is a 1-bit something went wrong
    mov     x1, x2              ;[4] the next bit expects the last state to be in x1
    nop2                        ;[5]
                                ;[6]
    rjmp    didunstuff4         ;[7]
                                ;[8] jump delay of rjmp didunstuffX 

unstuff5:                       ;[8] this is the jump delay of rjmp unstuffX
    nop                         ;[9]
    ori     shift, 0x20         ;[10] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0xDF            ;[11] mark this bit as inverted (will be corrected before storing shift)
    in      x1, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    eor     x2, x1              ;[1] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x2, USBMASK         ;[2] mask the interesting bits
    breq    stuffErr2           ;[3] if the stuff bit is a 1-bit something went wrong
    mov     x2, x1              ;[4] the next bit expects the last state to be in x2
    nop                         ;[5]
    rjmp    didunstuff5         ;[6]
                                ;[7] jump delay of rjmp didunstuffX                                                 

unstuff6:                       ;[7] this is the jump delay of rjmp unstuffX
    nop2                        ;[8]
                                ;[9]
    ori     shift, 0x40         ;[10] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0xBF            ;[11] mark this bit as inverted (will be corrected before storing shift)
    in      x2, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    eor     x1, x2              ;[1] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x1, USBMASK         ;[2] mask the interesting bits
    breq    stuffErr2           ;[3] if the stuff bit is a 1-bit something went wrong
    mov     x1, x2              ;[4] the next bit expects the last state to be in x1
    rjmp    didunstuff6         ;[5]
                                ;[6] jump delay of rjmp didunstuffX 

unstuff7:                       ;[7] this is the jump delay of rjmp unstuffX
    nop                         ;[8]
    nop                         ;[9]
    ori     shift, 0x80         ;[10] invert the last received bit to prevent furhter unstuffing
    andi    x3, 0x7F            ;[11] mark this bit as inverted (will be corrected before storing shift)
    in      x1, USBIN           ;[0] we have some free cycles so we could check for bit stuffing errors
    eor     x2, x1              ;[1] x1 and x2 have to be different because the stuff bit is always a zero
    andi    x2, USBMASK         ;[2] mask the interesting bits
    breq    stuffErr2           ;[3] if the stuff bit is a 1-bit something went wrong
    mov     x2, x1              ;[4] the next bit expects the last state to be in x2
    rjmp    didunstuff7         ;[5]
                                ;[6] jump delay of rjmp didunstuff7

; local copy of the stuffErr desitnation for the second half of the receiver loop
stuffErr2:
    rjmp    stuffErr
